##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
pl <- read.csv("C:/Users/matth/Documents/Grad School/Spring 2022/STAA 566/Data/pl_table_05_20.csv")
table(pl$Squad)
##
## Arsenal Aston Villa Birmingham City Blackburn Blackpool
## 16 13 4 7 1
## Bolton Bournemouth Brighton Burnley Cardiff City
## 7 5 4 7 2
## Charlton Ath Chelsea Crystal Palace Derby County Everton
## 2 16 8 1 16
## Fulham Huddersfield Hull City Leeds United Leicester City
## 11 2 5 1 7
## Liverpool Manchester City Manchester Utd Middlesbrough Newcastle Utd
## 16 16 16 5 14
## Norwich City Portsmouth QPR Reading Sheffield Utd
## 5 5 3 3 3
## Southampton Stoke City Sunderland Swansea City Tottenham
## 9 10 11 7 16
## Watford West Brom West Ham Wigan Athletic Wolves
## 6 11 15 8 6
#Filter to only keep teams that played in the Premiere League for all 16 seasons
pl.best <- subset(pl, Squad %in% c("Arsenal", "Everton", "Chelsea", "Manchester City", "Liverpool", "Manchester Utd", "Tottenham"))
#Remove columns that have no data
pl.best <- pl.best[,1:11]
#Rename 2 columns to make them more descriptive
names(pl.best)[names(pl.best) == 'GD'] <- 'Goal.Difference'
names(pl.best)[names(pl.best) == 'Rk'] <- 'End.of.Season.Rank'
#Highlight by squad when hovering
pl.highlight <- highlight_key(pl.best, ~Squad)
#Making GGplot object
pl.gg <- ggplot(data = pl.highlight,
mapping = aes(x = Year,
y = Goal.Difference,
label = End.of.Season.Rank,
color = Squad)) +
geom_point(aes(),
alpha = 1,
shape = 20, size = 1.5) +
geom_smooth(method = "loess", se = FALSE, lwd = .5) +
ylab("Goal Difference (goals scored minus goals against)") +
scale_y_continuous(breaks = seq(-20, 80, by = 20)) +
theme_classic(base_size = 12) +
theme(legend.position = c(0.87, 0.25)) +
labs(title = "Goal Difference of Seven Premiere League Teams")
#Create plotly object
pl.plotly.highlight <- ggplotly(pl.gg, tooltip = c("Squad", "Goal.Difference", "End.of.Season.Rank")) %>%
highlight(on = "plotly_hover",
off = "plotly_relayout")
## `geom_smooth()` using formula 'y ~ x'
#Display object
pl.plotly.highlight
All of my data comes from https://fbref.com/en/comps/9/10728/stats/2020-2021-Premier-League-Stats which compiles data from the English Premiere League into downloadable data. I quality checked the information against the data on the Premiere League’s official website and it matched up.
I took the seven teams that played in the Premiere League for all 16 seasons for which I pulled data. What I wanted to show is their trends over the seasons while also factoring in the End of Season Rank for the teams. I was hoping to take into account the amount that each team spent to see if there was correlation between budget and their result at the end of the season, but there was too much for me to take into account (injuries, play time of each player, weather conditions in games, etc.)
The first thing that I added was the smooth lines to show general trends for each team. I had originally done just a simple connect-the-dots, but some of the teams had very inconsistent Goal Difference (see Manchester City from 2010 - 2015), so I decided to go with smooth lines for visual purposes.
The formatting that I chose was really just aesthetics to make sure that the plot wasn’t too painful to look at. The default smooth lines were, by default, too wide, so I narrowed them. I also chose the colors to make them distinct enough for someone who is colorblind (like me) to be able to distinguish them.
I also added the highlight feature which highlighted the team over each year. This feature was needed because, at first glance, the graph looks really messy, so the highlight really makes the graph legible.